rm(list=ls())
library(tidyverse)
library(rdrobust)
library(rdpower)
library(AER)

##### LOAD MY_schools DATASET #####
# Set working directory to where datasets are stored
MY_schools <- read.csv("MY_schools.csv")

##### RECODE ETHNIC DISCRIMINATION VARIABLE FOR ANALYSIS ##### 
MY_schools <- MY_schools %>%
  rowwise() %>%
  mutate(EthnicDis = mean(c(Ethnic_PM,
                            Ethnic_Party,
                            Ethnic_Rights)))

colnames(MY_schools)

##### DESCRIPTIVE STATISTICS (Table OA2c.17) #####
sum_stats <- MY_schools %>%
  dplyr::select(Reform,Bilingual,Running,EthnicDis,
                Ethnic_PM,Ethnic_Rights,Ethnic_Party,
                Female,LangHomeBi,SecUrban,HighEduc,
                Employed,HouseIncome,
                ContactQuant,ContactQual) %>%
  psych::describe() %>%
  data.frame() # summary statistics
sum_stats <- sum_stats[,c(2:5, 8:9,11)]

##### BIRTH YEAR AND LANGUAGE OF INSTRUCTION #####
###### Respondents' Birth Years and Language (Figure 2) ###### 
hist_data <- MY_schools %>% 
  dplyr::group_by(BirthYear, Bilingual) %>%
  summarize(n = n()) %>%
  filter(Bilingual %in% c(0,1)) %>%
  mutate(prop = n / sum(n),
         freq = sum(n)) %>%
  filter(Bilingual == 1)

treat_status.p <- ggplot(hist_data, aes(x = BirthYear, 
                                        y = prop)) +
  geom_point(alpha = 0.5) +
  geom_line(linetype = "dotted",
            linewidth = 0.5) +
  geom_vline(xintercept = 1990,
             linetype = "dashed",
             linewidth = 0.2) + 
  scale_x_continuous(name = "Birth Year",
                     breaks = c(1985:1995)) +
  scale_y_continuous(name = "Proportion\n(English Instruction)",
                     limits = c(0,1)) +
  #ggtitle("All Respondents") +
  theme_bw() +
  theme(text=element_text(size=6))

treat_status.p <- treat_status.p +
  geom_bar(stat="identity",
           data = hist_data,
           aes(x = BirthYear,
               y = freq/1000),
           alpha = 0.5,
           width = 0.8)

###### Birth Year Predicts Language of Instruction (Table OA2d.18) ###### 
m1.LOI <- lm(Bilingual ~ Reform + 
               Female + LangHomeBi +
               SecUrban + HighEduc +
               Employed + HouseIncome,
             data = MY_schools) 
coeftest(m1.LOI, vcov. = vcovHC(m1.LOI, type = "HC0"))

m2.LOI <- glm(Bilingual ~ Reform + 
                Female + LangHomeBi +
                SecUrban + HighEduc +
                Employed + HouseIncome,
              data = MY_schools,
              family = "binomial") 
coeftest(m2.LOI, vcov. = vcovHC(m2.LOI, type = "HC0"))

##### RESULTS (Table 2 and Table OA2e.19) ##### 
m1 <- rdrobust(y = MY_schools$EthnicDis,x = MY_schools$Running,
               fuzzy = MY_schools$Bilingual,
               all = T,cluster = MY_schools$State,
               bwselect = "mserd",p = 1)
summary(m1) 

m2 <- rdrobust(y = MY_schools$EthnicDis,x = MY_schools$Running,
               fuzzy = MY_schools$Bilingual,all = T,
               cluster = MY_schools$State, bwselect = "mserd",
               covs = MY_schools$Female +
                 MY_schools$LangHomeBi +
                 MY_schools$SecUrban +
                 MY_schools$HighEduc +
                 MY_schools$Employed +
                 MY_schools$HouseIncome,
               p = 1)
summary(m2) 

##### EX-POST POWER ANALYSIS (Figure OA2f.6) ##### 
rdpower(data = cbind(MY_schools$EthnicDis,MY_schools$Running),
        fuzzy = MY_schools$Bilingual,
        cluster = MY_schools$State,
        tau = m1$coef[3],
        plot = T,
        graph.range = c(-1,1))

rdpower(data = cbind(MY_schools$EthnicDis,MY_schools$Running),
        fuzzy = MY_schools$Bilingual,
        covs = MY_schools$Female +
          MY_schools$LangHomeBi +
          MY_schools$SecUrban +
          MY_schools$HighEduc +
          MY_schools$Employed +
          MY_schools$HouseIncome,
        cluster = MY_schools$State,
        tau = m2$coef[3],
        graph.range = c(-1,1),
        plot = T)

##### DIFFERENT MODEL SPECIFICATIONS ##### 
###### Epanechnikov Kernel Weights (Table OA2g.20) ###### 
m3 <- rdrobust(y = MY_schools$EthnicDis,x = MY_schools$Running,
               fuzzy = MY_schools$Bilingual,all = T,
               cluster = MY_schools$State,
               bwselect = "mserd",p = 1,
               kernel = "epanechnikov")
summary(m3) 

m4 <- rdrobust(y = MY_schools$EthnicDis,x = MY_schools$Running,
               fuzzy = MY_schools$Bilingual,all = T,
               cluster = MY_schools$State,
               bwselect = "mserd", p = 1,
               covs = MY_schools$Female +
                 MY_schools$LangHomeBi +
                 MY_schools$SecUrban +
                 MY_schools$HighEduc +
                 MY_schools$Employed +
                 MY_schools$HouseIncome,
               kernel = "epanechnikov")
summary(m4) 

###### Different Bandwidths on Both Sides of Cutoff (Table OA2g.21) ###### 
m5 <- rdrobust(y = MY_schools$EthnicDis,x = MY_schools$Running,
               fuzzy = MY_schools$Bilingual,all = T,
               cluster = MY_schools$State,
               bwselect = "msetwo",p = 1)
summary(m5) # bw = (687.123,634.652)

m6 <- rdrobust(y = MY_schools$EthnicDis,x = MY_schools$Running,
               fuzzy = MY_schools$Bilingual,all = T,
               cluster = MY_schools$State,
               bwselect = "msetwo", p = 1,
               covs = MY_schools$Female +
                 MY_schools$LangHomeBi +
                 MY_schools$SecUrban +
                 MY_schools$HighEduc +
                 MY_schools$Employed +
                 MY_schools$HouseIncome)
summary(m6) # bw = (690.353,614.008)

###### Placebo Cutoff = -365 days (Table OA2g.22) ###### 
m7 <- rdrobust(y = MY_schools$EthnicDis,x = MY_schools$Running,
               fuzzy = MY_schools$Bilingual,all = T,
               cluster = MY_schools$State,
               bwselect = "mserd",
               p = 1, c = -365)
summary(m7) 

m8 <- rdrobust(y = MY_schools$EthnicDis,x = MY_schools$Running,
               fuzzy = MY_schools$Bilingual,all = T,
               cluster = MY_schools$State,
               bwselect = "mserd", p = 1,
               covs = MY_schools$Female +
                 MY_schools$LangHomeBi +
                 MY_schools$SecUrban +
                 MY_schools$HighEduc +
                 MY_schools$Employed +
                 MY_schools$HouseIncome,
               c = -365)
summary(m8) 

###### Placebo Cutoff = 365 days (Table OA2g.23) ###### 
m9 <- rdrobust(y = MY_schools$EthnicDis,x = MY_schools$Running,
               fuzzy = MY_schools$Bilingual,all = T,
               cluster = MY_schools$State,
               bwselect = "mserd", p = 1,
               c = 365)
summary(m9) 

m10 <- rdrobust(y = MY_schools$EthnicDis,x = MY_schools$Running,
                fuzzy = MY_schools$Bilingual,all = T,
                cluster = MY_schools$State,
                bwselect = "mserd", p = 1,
                covs = MY_schools$Female +
                  MY_schools$LangHomeBi +
                  MY_schools$SecUrban +
                  MY_schools$HighEduc +
                  MY_schools$Employed +
                  MY_schools$HouseIncome,
                c = 365)
summary(m10) 

##### INTERGROUP CONTACT ##### 
###### Reform Does Not Predict Contact Quantity (Table OA2h.24) ######  
m1.contactQuant <- rdrobust(y = MY_schools$ContactQuant,x = MY_schools$Running,
                            all = T, p = 1,
                            cluster = MY_schools$State,
                            bwselect = "mserd")
summary(m1.contactQuant)

m2.contactQuant <- rdrobust(y = MY_schools$ContactQuant,x = MY_schools$Running,
                            all = T,p = 1,
                            cluster = MY_schools$State,
                            bwselect = "mserd",
                            covs = MY_schools$Female +
                              MY_schools$LangHomeBi +
                              MY_schools$SecUrban +
                              MY_schools$HighEduc +
                              MY_schools$Employed +
                              MY_schools$HouseIncome)
summary(m2.contactQuant)

###### Reform Does Not Predict Contact Quality (Table OA2h.25) ######  
m1.contactQual <- rdrobust(y = MY_schools$ContactQual,x = MY_schools$Running,
                           all = T, p = 1,
                           cluster = MY_schools$State,
                           bwselect = "mserd")
summary(m1.contactQual)

m2.contactQual <- rdrobust(y = MY_schools$ContactQual,x = MY_schools$Running,
                           all = T,p = 1,
                           cluster = MY_schools$State,
                           bwselect = "mserd",
                           covs = MY_schools$Female +
                             MY_schools$LangHomeBi +
                             MY_schools$SecUrban +
                             MY_schools$HighEduc +
                             MY_schools$Employed +
                             MY_schools$HouseIncome)
summary(m2.contactQual)

###### Bilingual Instruction Does Not Predict Contact Quantity (Table OA2h.26) ######  
m3.contactQuant <- rdrobust(y = MY_schools$ContactQuant,
                            x = MY_schools$Running,
                            fuzzy = MY_schools$Bilingual,
                            all = T,
                            cluster = MY_schools$State,
                            bwselect = "mserd",
                            p = 1)
summary(m3.contactQuant) 

m4.contactQuant <- rdrobust(y = MY_schools$ContactQuant,
                            x = MY_schools$Running,
                            fuzzy = MY_schools$Bilingual,
                            all = T,
                            cluster = MY_schools$State,
                            bwselect = "mserd",
                            covs = MY_schools$Female +
                              MY_schools$LangHomeBi +
                              MY_schools$SecUrban +
                              MY_schools$HighEduc +
                              MY_schools$Employed +
                              MY_schools$HouseIncome,
                            p = 1)
summary(m4.contactQuant)

###### Bilingual Instruction Does Not Predict Contact Quality (Table OA2h.27) ######  
m3.contactQual <- rdrobust(y = MY_schools$ContactQual,
                           x = MY_schools$Running,
                           fuzzy = MY_schools$Bilingual,
                           all = T,
                           cluster = MY_schools$State,
                           bwselect = "mserd",
                           p = 1)
summary(m3.contactQual)

m4.contactQual <- rdrobust(y = MY_schools$ContactQual,
                           x = MY_schools$Running,
                           fuzzy = MY_schools$Bilingual,
                           all = T,
                           cluster = MY_schools$State,
                           bwselect = "mserd",
                           covs = MY_schools$Female +
                             MY_schools$LangHomeBi +
                             MY_schools$SecUrban +
                             MY_schools$HighEduc +
                             MY_schools$Employed +
                             MY_schools$HouseIncome,
                           p = 1)
summary(m4.contactQual)






